/*=============================================================================================
This .do file merges labor and population data from 1971-1997 editions of 
the Eurostat Regional Yearbook.

*=============================================================================================*/

clear all
set more off

use "$dta_files/EYB_labor_1968_1996.dta", clear
	merge 1:1 nuts year using "$dta_files/EYB_pop_1968_1996.dta", nogen 
	
tempfile temp_all
save `temp_all.dta'

*-------------------------------------------------------------------------------
* 1. separate regions BE24 and BE31 from Brabant, BE1&BE24&BE31 
*-------------------------------------------------------------------------------

mkmat LF_eyb EMP_eyb UNEMP_eyb POP_eyb if (nuts=="BE2")&(year>=1973&year<=1992), mat(BE2)
mkmat LF_eyb EMP_eyb UNEMP_eyb POP_eyb if (nuts=="BE3")&(year>=1973&year<=1992), mat(BE3)

mkmat LF_eyb EMP_eyb UNEMP_eyb POP_eyb if (nuts=="BE21")&(year>=1973&year<=1992), mat(BE21)
mkmat LF_eyb EMP_eyb UNEMP_eyb POP_eyb if (nuts=="BE22")&(year>=1973&year<=1992), mat(BE22)
mkmat LF_eyb EMP_eyb UNEMP_eyb POP_eyb if (nuts=="BE23")&(year>=1973&year<=1992), mat(BE23)
mkmat LF_eyb EMP_eyb UNEMP_eyb POP_eyb if (nuts=="BE25")&(year>=1973&year<=1992), mat(BE25)
matrix sumBE2=BE21+BE22+BE23+BE25

mkmat LF_eyb EMP_eyb UNEMP_eyb POP_eyb if (nuts=="BE32")&(year>=1973&year<=1992), mat(BE32)
mkmat LF_eyb EMP_eyb UNEMP_eyb POP_eyb if (nuts=="BE33")&(year>=1973&year<=1992), mat(BE33)
mkmat LF_eyb EMP_eyb UNEMP_eyb POP_eyb if (nuts=="BE34")&(year>=1973&year<=1992), mat(BE34)
mkmat LF_eyb EMP_eyb UNEMP_eyb POP_eyb if (nuts=="BE35")&(year>=1973&year<=1992), mat(BE35)
matrix sumBE3=BE32+BE33+BE34+BE35

matrix BE24=BE2-sumBE2
matrix BE31=BE3-sumBE3

clear
svmat BE24, names(labor)
gen nuts="BE24"
gen year=.
replace year=1973 in 1
replace year=1975 in 2
replace year=1977 in 3
replace year=1979 in 4
replace year=1980 in 5
replace year=1981 in 6
replace year=1982 in 7
replace year=1983 in 8
replace year=1984 in 9
replace year=1985 in 10
replace year=1986 in 11
replace year=1987 in 12
replace year=1988 in 13
replace year=1990 in 14
replace year=1991 in 15
replace year=1992 in 16
save temp_be24, replace
clear
svmat BE31, names(labor)
gen nuts="BE31"
gen year=.
replace year=1973 in 1
replace year=1975 in 2
replace year=1977 in 3
replace year=1979 in 4
replace year=1980 in 5
replace year=1981 in 6
replace year=1982 in 7
replace year=1983 in 8
replace year=1984 in 9
replace year=1985 in 10
replace year=1986 in 11
replace year=1987 in 12
replace year=1988 in 13
replace year=1990 in 14
replace year=1991 in 15
replace year=1992 in 16
save temp_be31, replace

append using temp_be24
rename labor1 LF_eyb
rename labor2 EMP_eyb
rename labor3 UNEMP_eyb
rename labor4 POP_eyb

tempfile BE_24_31
save `BE_24_31.dta' 

rm temp_be24.dta
rm temp_be31.dta

*-------------------------------------------------------------------------------
* 2. Add duplicates for NUTS2==NUTS1 regions (e.g. add DE30 to DE3)
*-------------------------------------------------------------------------------
use `temp_all.dta', clear

keep if nuts=="DE3"|nuts=="DE5"|nuts=="DE6"
replace nuts="DE30" if nuts=="DE3"
replace nuts="DE50" if nuts=="DE5"
replace nuts="DE60" if nuts=="DE6"

duplicates drop nuts year, force
tempfile DE
save `DE.dta' 

*-------------------------------------------------------------------------------
* 3. Aggregate regions of IT to NUTS1 regions
*-------------------------------------------------------------------------------

use `temp_all.dta', clear

gen nuts1=""
replace nuts1="ITC" if nuts=="ITC1"|nuts=="ITC2"|nuts=="ITC3"|nuts=="ITC4"| ///
	(nuts=="ITC1&ITC2&ITC3" & year==1968)
replace nuts1="ITF" if nuts=="ITF1"|nuts=="ITF2"|nuts=="ITF3"|nuts=="ITF4"| ///
	nuts=="ITF5"|nuts=="ITF6"|nuts=="ITF1&ITF2"|(nuts=="ITF4&ITF5&ITF6" & year==1968)
replace nuts1="ITH" if nuts=="ITH1&ITH2"|nuts=="ITH3"|nuts=="ITH4"|nuts=="ITH5"| ///
	(nuts=="ITH1&ITH2&ITH3&ITH4"& year==1968)
replace nuts1="ITI" if nuts=="ITI1"|nuts=="ITI2"|nuts=="ITI3"|nuts=="ITI4"| ///
	(nuts=="ITI1&ITI2&ITI3"& year==1968)
replace nuts1="ITG" if nuts=="ITG1"|nuts=="ITG2"

replace nuts1="NL3" if ///
	(nuts=="NL31"|nuts=="NL32"|nuts=="NL33"|nuts=="NL34") ///
	& (year==1973|year==1975|year==1977) ///
	
drop if nuts1==""
drop if year ==1990 & (nuts1=="ITC"|nuts1=="ITF"|nuts1=="ITH"|nuts1=="ITI"|nuts1=="ITG")  // missing values make aggregation impossible for these regions

collapse (sum) *_eyb (first) region_name, by(year nuts1)
rename nuts1 nuts

*-------------------------------------------------------------------------------
* Eurostat Yearbook Data for many ES regions in 1985 has outliers 
*-------------------------------------------------------------------------------

gen country = substr(nuts,1,2)
foreach var in LF EMP UNEMP {
	replace `var'_eyb =. if year==1985 & country=="ES"
}
drop country

*-------------------------------------------------------------------------------
* 4. Append data. Replace "0" with "." 
*-------------------------------------------------------------------------------

append using `temp_all.dta'
append using `DE.dta'
append using `BE_24_31.dta'

sort nuts year

foreach var of varlist LF* EMP* UNEMP* POP* {
replace `var'=. if `var'==0
}

label var LF_eyb "Labor force, Eurostat yearbook"
label var EMP_eyb "Employed, Eurostat yearbook"
label var UNEMP_eyb "Unemployed, Eurostat yearbook"
label var POP_eyb "Population, Eurostat yearbook"
  
sort nuts year 
save "$dta_files/EYB_1968_1996_all.dta", replace

rm "$dta_files/EYB_labor_1968_1996.dta"
rm "$dta_files/EYB_pop_1968_1996.dta"
